

************ This program generates placebo networks to run the analysis in Table 3, Panel A, of the paper ***************

clear all
set more off, permanently
cd "$localdir\Data"
global output "$localdir\Output"

******** Placebo with random j **************

*** Steps
* Data set 1: unique, unique_j, muni_j, industry1_j, year; count variable by unique-year-muni_j-industry1_j (from RF data set, i.e. i-outcomes on k-network)
* Data set 2: Data set 1 without unique_j
* New data from data set 1 with unique_j, muni_j, industry1_j; count randomly assigned within year-muni_j-industry1_j, for all non-i j's
* Merge random j's on data set 2 for each i by year-muni_j-industry1_j-count; keep unique, unique_j (new placebo j's')
* Merge actual k's on fake (random) j's -> new i-j-k data, collapse to i-k
* Merge in i outcomes
* Run RF (main analysis) on new placebo network (separate do-file)

* Municipalities 
use "registerdata19802012_us.dta", clear 
keep if year>=2009 & year<=2012
replace year=year+1
keep muni year pnr 
* Merge in our unique identifier number
merge m:1 pnr using "Unique ID (pnr).dta"
keep if _merge==3
drop pnr _merge
save "Municipalities", replace

* Municipality and industry for j
use "i-j-k links for RF", clear

rename unique unique_i
rename unique_j unique

merge m:1 unique year using "Industry codes.dta", keepusing(industry_code_1_digit)
replace industry_code_1_digit = 999 if _merge==1 
drop if _merge==2
drop _merge

merge m:1 unique year using "Municipalities", keepusing(muni)
drop if _merge==2
drop _merge

rename unique unique_j
rename unique_i unique
rename muni muni_j
rename industry_code_1_digit industry_j

keep unique unique_j muni_j industry_j year
* Unique i-j-year observations
duplicates drop unique unique_j year muni_j industry_j, force

egen double id=group(unique)
egen double sample_group=group(year muni_j industry_j)
drop if sample_group==.

bysort id sample_group: g double count=_n

* Sample to merge on random j's
preserve 
keep unique id year muni_j industry_j sample_group count
save data2, replace
restore

keep id sample_group unique_j year

* Sample to draw random j's
save data1, replace

* Assignment of random j's
* Total i's=8747 
forval i=1/8747 {
use data1, clear
display `i'
* Keep sample groups of i
g temp=(id==`i')
bysort sample_group: egen sample_group_of_i=max(temp)
keep if sample_group_of_i==1 
drop sample_group_of_i

* Remove j's of i
bysort unique_j year: egen j_of_i=max(temp)
drop if j_of_i==1 // none of i's j's included
drop j_of_i

drop temp id year

* Random ordering
bysort sample_group: g double uni=runiform()
bysort sample_group (uni): g double count=_n

* Merge on i's
keep unique_j sample_group count
merge 1:m sample_group count using data2, update replace  // Only update missing observations in the using dataset
keep if _merge==2 | _merge==3 | _merge==5
drop _merge
replace unique_j=. if id>`i'
save data2, replace
}

* Final i-new j data set
keep year unique unique_j
save i_random_j, replace

* Merge j-k's with new i-j sets
use "i-j-k links for RF", clear
keep year unique_j unique_k
duplicates drop year unique_j unique_k, force
merge m:m year unique_j using i_random_j
drop _merge

* Prepare for RF analysis (i-k level)
rename unique_k unique_network
keep year unique unique_network 
duplicates drop year unique unique_network, force
save i_k_RF_placebo, replace

* Data with k shock info
use "Unemployment Outcomes RF for Analysis - Last Year.dta", clear
keep unique_network year grossunemp_enter_network muni_network
duplicates drop year unique_network, force
save k_info, replace

* Data with i outcomes for each relevant RF column
use "Unemployment Outcomes RF for Analysis - Last Year.dta", clear
keep unique year prob_unemployed network_size both ind2_year ocp_year educ_year muni 
duplicates drop year unique, force
save i_unemployment, replace

use "UI Outcomes RF for Analysis - Last Year.dta", clear
keep unique year more_ui network_size both ind2_year ocp_year educ_year muni
duplicates drop year unique, force
save i_ui, replace

use "Economic Policy Outcomes RF for Analysis - Last Year.dta", clear
keep unique year gov_solution network_size both ind2_year ocp_year educ_year muni
duplicates drop year unique, force
save i_policy, replace

use "Voted RF for Analysis - Last Year.dta", clear
keep unique year voted_left_2011 network_size both ind2_year ocp_year educ_year muni
duplicates drop year unique, force
save i_voted, replace
 
use "Voting RF for Analysis - Last Year.dta", clear
keep unique year vote_left network_size both ind2_year ocp_year educ_year muni
duplicates drop year unique, force
save i_voting, replace
 
use "Ideology RF for Analysis - Last Year.dta", clear
keep unique year govt_more_help network_size both ind2_year ocp_year educ_year muni
duplicates drop year unique, force
save i_ideology, replace
 
use "National Unemployment RF for Analysis - Last Year.dta", clear
keep unique year unemp_rate_estimate network_size both ind2_year ocp_year educ_year muni
duplicates drop year unique, force
save i_estimate, replace
 
use "National Unemployment Forecast RF for Analysis - Last Year.dta", clear
keep unique year unemp_rate_estimate_next network_size both ind2_year ocp_year educ_year muni
duplicates drop year unique, force
save i_estimate_next, replace
 
* Construct datasets for the new placebo RF
use i_k_RF_placebo, clear
merge m:1 year unique_network using k_info
drop _merge
save i_k_RF_placebo, replace

foreach outcome in unemployment ui policy voted voting ideology estimate estimate_next {
use i_k_RF_placebo, clear
merge m:1 year unique using i_`outcome'
drop _merge
save RF_placebo_`outcome', replace
}


